Geospatial Analysis in R
Data Science CEP Presentation
David Beskow
May 2016
“When should I use R as opposed to GIS software produced by ESRI and other companies?”
| GIS | R | |
|---|---|---|
| Visual interaction | \(\rightarrow\) | Data & model focused |
| Data management | \(\rightarrow\) | Analysis |
| Geometric operations | \(\rightarrow\) | Attributes as important |
| Standard workflows | \(\rightarrow\) | Creativity & innovation |
| Single map production | \(\rightarrow\) | Many (simpler) maps |
| Click, click, click, & click | \(\rightarrow\) | Repeatability (single script) |
| Speed of execution | \(\rightarrow\) | Speed of development |
| Typically expensive | \(\rightarrow\) | Free |
| ggmap \(\rightarrow\) maps/imagery | sp \(\rightarrow\) shapefile | leaflet \(\rightarrow\) interactive |
|---|---|---|
Developed by Edzer Pebesma and Rober Bivand in 2005, it is the base package that defines many of the classes and methods used in geospatial analysis in R:
library(raster) ##Load the Raster Library
usa<-getData('GADM', country='USA', level=1) ##Get the Province Shapefile for the US
plot(usa) ##Plot this shapefilebool <- (coordinates(usa)[,1] > -130) & (coordinates(usa)[,1] < -60)
usa<-usa[bool,]
plot(usa)Converting US Tornado Data to SpatialPointsDataFrame
library(sp)
tor.sp <- tor ##Create new data frame
#promote an ordinary data frame
coordinates(tor.sp) = ~elon + elat ##Identify field names that contain lat/lon
proj4string(tor.sp) <- proj4string(usa) ##Assign projection
#plot on shapefile
plot(usa,col="lightgray") ##plot shapefile
points(tor.sp,col="red") ##plot points (do not need x,y...it is a spatial obj)heatMap(tor.sp,usa,col="red",main="Tornado Data Denstiy")myMap <- get_map(location = "kansas city", zoom = 5) ##Get the map
myMap <- ggmap(myMap, extent = "device") ##Prepare Map
myMap +
stat_density2d(aes(x = elon, y = elat, fill = ..level..,alpha=..level..), ##Density
bins = 10, geom = "polygon", data = tor) +
scale_fill_gradient(low = "black", high = "red")+ ##Gradient
ggtitle("Map of US Tornado Density") ##Titlelibrary(ggmap)
APGMap<-qmap('6008 Jayhawk rd, aberdeen proving grounds, md',
source='google',
maptype='terrain',
zoom=10)| google:satellite | google:roadmap | google:terrain |
|---|---|---|
Create Basic Plot
kansas<-qmap('fort leavenworth, ks', zoom=7) ##Get Map for Kansas
kansas+ ##This plots the Kansas Map
geom_point(aes(x = elon, y = elat), data = tor) ##This adds the points to itIdentify type and width of tornado
kansas+ ##Plot Kansas Map
geom_point(aes(x = elon, y = elat, colour = f,size=wid),data = tor)+ ##Plot points
scale_colour_gradient(low="blue",high="red") ##Color Gradient| Basic | Advanced |
|---|---|
Code for creating Point Density Plot of Australia:
aus.density<-pointdensity(aus.subset,lat_col="lat",lon_col="lon",grid_size=20,radius=50)
map_base <- qmap('Australia', zoom = 4, darken=0.2)
map_base + geom_point(aes(x = lon, y = lat, colour = count,alpha=count),
shape = 16, size = 2, data = aus.density) +
scale_colour_gradient(low = "yellow", high = "red") | Point Density Plot of Australia | Point Density Plot of Sydney |
leaflet(data=sydney) %>%
addTiles() %>% addMarkers(~lon, ~lat
)photographers<-unique(sydney$V2) #ID photographers
final<-list() #Create OJB to store results
for(i in 1:length(photographers)){
temp<-subset(sydney,V2==photographers[i]) #Subset by photographer
temp<-temp[order(temp$V4),] #Order by time
breakpoints <- c(FALSE,diff(temp$V4)>8*60) #Establish break points if diff > 8min
temp$label <- as.numeric(cut.POSIXt(temp$V4, c(min(temp$V4),
temp[breakpoints, "V4"], max(temp$V4)+1))) #Convert factor to label
temp$label<-paste(temp$V2,temp$label,sep="") #Concatenate label with username
final[[i]]<-temp #Store temp data
}
final<-do.call("rbind", final) #Convert from list to dataframe| Flickr Tracks | Understanding the Algorithm |
|---|---|
library(pointdensityP)
H_tor <- pointdensity(df = tor, lat_col = "elat", lon_col = "elon",
grid_size = 50, radius = 100)
map_base <- qmap(location="st louis, mo", source='stamen',maptype="toner",zoom = 5, darken=0.3)
map_base +
geom_point(aes(x = lon, y = lat, colour = count,alpha=count),
shape = 16, size = 2, data = H_tor) +
scale_colour_gradient(low = "yellow", high = "red")library(TeachingDemos)
afg<-getData('GADM', country='AFG', level=1) ##Get the Afghanistan Shapefile
wits_sp<-read.csv("wits_sp.csv",as.is=TRUE) ##Load the WITS data from CSV
##Create the SpatialPointsDataFrame
coordinates(wits_sp)<-c("lon","lat")
proj4string(wits_sp)<-proj4string(afg)
##Now all you have to do is plot it!
plotPiePoly(data = wits_sp, shape=afg, factor = "flag", size = c(0.5, 0.5), legend = TRUE,
main = "Sample Pieplot Over Polygon (WITS Data)")players$lon <- NA; players$lat <- NA
for(i in 1:nrow(players)){
try(temp<-geocode(players$City[i]))
players$lat[i]<-temp$lat
players$lon[i]<-temp$lon
}
state<-usa[grep(myState,usa$VARNAME_1),]
plot(state)
with(players,points(lon,lat,pch=16,col="blue",cex=3*count/max(count)))teams<-read.csv("teams.csv",as.is=TRUE)
players<-read.csv('allplayers.csv',as.is=TRUE)
bestPlayers<-subset(players,PTS.1>10)
library(fields)
mat<-rdist.earth(as.matrix(bestPlayers[,31:32]),as.matrix(teams[,c(4,3)]))How do you extract the closest team with 1-2 lines of code?
Who would Michael Jordan play for? What would his team look like?
bestPlayers$Player[bestPlayers$closeTeam=="Nets"]## [1] "Charles Smith" "Stephon Marbury" "Sam Perkins"
## [4] "Michael Jordan*" "J.R. Smith" "Tom Gugliotta"
## [7] "Tobias Harris" "Lamar Odom" "Charlie Villanueva"
## [10] "Kenny Smith" "Kenny Anderson" "Roy Hibbert"
## [13] "Ryan Gomes"